In [1]:
import pymysql
db = pymysql.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"sakila",
charset='utf8',
)
rental_df = pd.read_sql("SELECT * FROM rental;", db)
In [7]:
rental_df = rental_df[["rental_id", "rental_date"]]
In [8]:
rental_df["month"] = rental_df["rental_date"].apply(lambda x: str(x)[:7])
rental_df.head()
Out[8]:
In [9]:
month_groups = rental_df.groupby("month")
In [12]:
month_groups.get_group("2005-05")
Out[12]:
In [10]:
rental_df["month"].unique()
Out[10]:
In [16]:
len(month_groups.get_group("2005-05"))
Out[16]:
In [13]:
pd.DataFrame([
{
"month": month,
"rental count": len(month_groups.get_group(month)),
}
for month
in rental_df["month"].unique()
])
Out[13]:
In [18]:
rental_df.groupby("month")
Out[18]:
In [17]:
rental_df.groupby("month").size()
Out[17]:
In [19]:
rental_df.groupby("month").agg({"rental_id": np.size})
Out[19]:
In [24]:
SQL_QUERY = """
SELECT LEFT(rental_date, 7) "Month", COUNT(*) "Total Rental"
FROM rental
GROUP BY Month
;
"""
pd.read_sql(SQL_QUERY, db)
Out[24]:
In [39]:
SQL_QUERY = """
SELECT *
FROM payment
;
"""
payment_df = pd.read_sql(SQL_QUERY, db)
In [40]:
payment_df = payment_df[["payment_date", "payment_id", "amount"]]
In [41]:
payment_df.head(1)
Out[41]:
In [42]:
payment_df["month"] = payment_df["payment_date"].apply(lambda x: str(x)[:7])
In [46]:
payment_df.groupby("month").agg({"payment_id": np.size, "amount": [np.sum, np.mean]})
Out[46]:
In [47]:
# 마지막으로 sql
In [54]:
SQL_QUERY = """
SELECT
LEFT(payment_date, 7) Month,
count(*) "Total Payment",
SUM(amount) "Total Revenue",
AVG(amount) "Average Payment"
FROM payment
GROUP BY Month
;
"""
pd.read_sql(SQL_QUERY, db)
Out[54]: